Contents

0.1 Load Packages

library(DESeq2)
library(tximport)
library(biomaRt)
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(readxl)
library(Rtsne)
library(pheatmap)
library(IHW)
library(dplyr)
library(grid)
library(gridExtra)
library(jyluMisc)
library(gtable)

0.2 bioMart

# ensembl.mart <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
# annotations <-
# getBM(
# attributes = c(
# 'hgnc_symbol',
# 'refseq_mrna',
# 'transcript_biotype'
# ),
# mart = ensembl.mart
# )
# annotations <- annotations %>% filter(hgnc_symbol != "" & refseq_mrna != "")

0.3 Read Counts from Salmon

# files <-
#   file.path("../data/alignments/",
#   list.files("../data/alignments"),
#   "quant.sf")
#   sample.ids <-
#   list.files("../data/alignments") %>% map(function(x)
#   gsub("_sequence", "", strsplit(x, split = "lane[[:digit:]]+")[[1]][2]))
#   names(files) <- sample.ids
#   tx2gene <- annotations %>% dplyr::select(refseq_mrna, hgnc_symbol)
#   txi.salmon <-
#   tximport(files,
#   type = "salmon",
#   tx2gene = tx2gene,
#   ignoreTxVersion = TRUE)

0.4 DESeq Dataset

# sample.table <-
#   data.frame(row.names = sample.ids,
#   fileName = list.files("../data/alignments/"))
# dds <- DESeqDataSetFromTximport(txi.salmon, sample.table, design = ~1)

0.5 Annotating Samples

# sample.list <-
#   read_xlsx("../data/metadata/sampleList.xlsx", sheet = 2)
#   plate <- sample.list$`Plate well number`
#   id <- sample.list$ID
#   sample.name <- sample.list$`Sample Name`
#   cell_line <-
#   sample.name %>% map(function(x)
#   strsplit(x, split = "_")[[1]][1])
#   cell_line <- unlist(cell_line)
#   treatment <-
#   sample.name %>% map(function(x)
#   strsplit(x, split = "_")[[1]][2])
#   treatment <- unlist(treatment)
#   treatment <- replace(treatment, treatment == "NTC-1-old", "NTC-1")
#   repl <-
#   sample.name %>% map(function(x)
#   strsplit(x, split = "_")[[1]][3])
#   repl <- unlist(repl)
#   sample.annotations <-
#   data.frame(
#   row.names = id,
#   cell_line = as.character(cell_line),
#   treatment = as.character(treatment),
#   replicate = repl,
#   plate = plate
#   )
#   write.csv(sample.annotations, file = "../data/metadata/sample_annotations.csv", row.names = T)
#   sample.annotations <-
#   sample.annotations[sample.annotations$cell_line != "OCI-AML2", ]
#   colData(dds) <-
#   cbind(colData(dds), sample.annotations[rownames(colData(dds)), ])
#   dds$lane <- c(rep("2", 30), rep("3", 24))

0.6 Annotating Transcripts

# gene.annotations <- annotations[match(rownames(dds), annotations$hgnc_symbol),]
# rownames(gene.annotations) <- rownames(dds)
# rowData(dds) <- gene.annotations
# dds <- dds[,dds@colData$cell_line != "OCI-AML2"]
# save(dds, file = "../data/RData/3RNAseq.RData")
sample.ids <- list.files("../data/alignments") %>% map(function(x) gsub("_sequence", "", strsplit(x, split ="lane[[:digit:]]+")[[1]][2]))
sample.annotations <- read.csv("../data/metadata/sample_annotations.csv", row.names = 1)
sample.ids <- unlist(sample.ids)
sample.ids <- sample.ids[sample.annotations[sample.ids, "cell_line"] != "OCI-AML2"]
sample.annotations <- sample.annotations[sample.annotations$cell_line != "OCI-AML2", ]
load("../data/RData/3RNAseq.RData")

0.7 PCA on Expression Matirix

sample.ids <- sample.ids[which(dds$treatment %in% c("AK2-3D", "NTC-1"))]
dds <- dds[, dds$treatment %in% c("AK2-3D", "NTC-1")]

dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
dds <- estimateSizeFactors(dds)
dds <- dds[which(rowSums(counts(dds)) > 50), ]
# dds.vst <- varianceStabilizingTransformation(dds, blind = TRUE)
dds.vst <- dds
exprMat <- assay(dds.vst)
sds <- rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:5000], ]
pcRes <- prcomp(t(exprMat), scale. = FALSE, center = TRUE)
eigs <- pcRes$sdev ^ 2
eigs <- eigs / sum(eigs)

plotTab <- data.frame(pcRes$x[, c(1, 2)])
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <-
sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <-
sample.annotations[plotTab$sampleID, "treatment"]

ggplot(plotTab,
aes(
x = PC1,
y = PC2,
label = sampleID,
color = cellLine,
shape = treatment
)) +
geom_point() +
geom_text_repel(aes(PC1, PC2, label = sampleID)) +
theme_bw()

0.8 t-SNE on Expression Matirix

tsneRun <- function(distMat,
                    perplexity = 10,
                    theta = 0,
                    max_iter = 5000) {
                    tsneRes <- Rtsne(
                    distMat,
                    perplexity = perplexity,
                    theta = theta,
                    max_iter = max_iter,
                    is_distance = TRUE,
                    dims = 2
                    )
                    tsneRes <- tsneRes$Y
                    rownames(tsneRes) <- labels(distMat)
                    colnames(tsneRes) <- c("x", "y")
                    tsneRes
                    }
                    
                    distViab <- dist(t(exprMat))
                    
                    plotTab <-
                    data.frame(tsneRun(distViab, perplexity = 2, max_iter = 10000))
                    
                    #plot
                    plotTab$sampleID <- rownames(plotTab)
                    plotTab$cellLine <-
                    sample.annotations[plotTab$sampleID, "cell_line"]
                    plotTab$treatment <-
                    sample.annotations[plotTab$sampleID, "treatment"]
                    
                    ggplot(plotTab,
                    aes(
                    x = x,
                    y = y,
                    label = sampleID,
                    color = cellLine,
                    shape = treatment
                    )) +
                    geom_point() +
                    geom_text_repel(aes(x, y, label = sampleID)) +
                    theme_bw()

0.9 Sample Similarities

colAnno <- data.frame(plotTab[,c("cellLine", "treatment")])
corMat <- cor(exprMat)
pheatmap(corMat, annotation_col = colAnno)

0.10 Differential Expressed Genes between Treatments

dds$id <- unlist(sample.ids)
dds$cell_line <- sample.annotations[dds$id, "cell_line"]
dds$treatment <- sample.annotations[dds$id, "treatment"]
dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
design(dds) <- ~ cell_line

# ddsCell <- DESeq(dds)
# results.cell <- results(ddsCell, contrast = c("cell_line" , "Raji", "Namalwa"))
# results.df <- as.data.frame(results.cell)
# summary(results.df)
# res.05 <- results(ddsCell, alpha = 0.05)
# table(res.05$padj < 0.05)
# resIHW <- results(ddsCell, filterFun=ihw)
# summary(resIHW)
results.cell <- list()
dds$lane <- factor(dds$lane)
dds.cells <-
unique(dds$cell_line[dds$treatment %in% c("NTC-1", "AK2-3D")]) %>% map(function(x)
dds[, dds$cell_line == x]) %>% map(function(x)
x[apply(counts(x), 1, function(x)
all(x > 10)), ])

dl <- function(x) {
x$treatment <- droplevels(x$treatment)
x$lane <- droplevels(x$lane)
if (length(levels(x$lane)) > 1) {
design(x) <- ~ treatment + lane
} else{
design(x) <- ~ treatment
}
x
}
dds.cells <- dds.cells %>% map(dl)
results.cell <-
dds.cells %>% map(function(x)
DESeq(x, betaPrior = T)) %>% map(function(x)
results(x, contrast = c("treatment", "NTC-1", "AK2-3D")))
names(results.cell) <-
unique(dds$cell_line[dds$treatment %in% c("NTC-1", "AK2-3D")])
null <-
names(results.cell) %>% map(function(x)
write.csv(results.cell[[x]], paste0("../results/DEG_cell_line/", x, ".csv")))
top.genes <-
results.cell %>% map(function(df)
rownames(df[df$pvalue < 0.05 & abs(df$log2FoldChange) > 0.5, ]))

0.11 P-Value Distributions for Treatment

plotList <- list()
results.cell %>% map(function(x)
tibble(pval = x$pvalue)) %>% map(
function(x)
ggplot(x, aes(pval)) + geom_histogram(
bins = 20,
fill = "blue",
col = "blue",
alpha = 0.3
) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5)) +
xlab("raw p values")
) -> plots

plotList <-
1:length(plots) %>% map(function(x)
plots[[x]] + ggtitle(sprintf(
"NTC-1 vs AK2-3D in %s", names(results.cell)[x]
)))
names(plotList) <- names(results.cell)
grid.arrange(grobs = plotList, ncol = 2)

0.12 Enrichment Analysis for Different Treatments

createInput <-
  function(DEres,
  pCut = 0.05,
  ifFDR = FALSE,
  rankBy = "stat") {
  if (ifFDR) {
  inputTab <-
  data.frame(DEres) %>% rownames_to_column(var = "symbol") %>% filter(padj <= pCut)
  } else {
  inputTab <-
  data.frame(DEres) %>% rownames_to_column(var = "symbol") %>% filter(pvalue <= pCut)
  }
  
  inputTab <-
  arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
  rownames(inputTab) <- inputTab$symbol
  inputTab$symbol <- NULL
  colnames(inputTab) <- "stat"
  return(inputTab)
  }
  
  gmts <-
  list(
  H = system.file("extdata", "h.all.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"),
  C6 = system.file("extdata", "c6.all.v5.1.symbols.gmt", package =
  "BloodCancerMultiOmics2017"),
  KEGG = system.file("extdata", "c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017")
  )
  
  
  results.cell %>% map(function(x)
  createInput(x, pCut = 0.1, ifFDR = FALSE)) %>% map(function(x)
  names(gmts) %>% map(
  function(y)
  runGSEA(
  inputTab = x,
  gmtFile = gmts[[y]],
  GSAmethod = "page"
  )
  )) -> p
  
  p %>% map(function(x)
  x[[1]]) -> res.Halmark
  p %>% map(function(x)
  x[[2]]) -> res.Oncogenetic
  p %>% map(function(x)
  x[[3]]) -> res.KEGG
  enBar.Halmark <-
  plotEnrichmentBar(res.Halmark,
  pCut = 0.05,
  ifFDR = FALSE,
  setName = "HALLMARK")
  plot(enBar.Halmark)

  enBar.Oncogenetic <-
  plotEnrichmentBar(res.Oncogenetic,
  pCut = 0.05,
  ifFDR = FALSE,
  setName = "Oncogenetic")
  plot(enBar.Oncogenetic)

  enBar.KEGG <-
  plotEnrichmentBar(res.KEGG,
  pCut = 0.05,
  ifFDR = FALSE,
  setName = "KEGG")
  plot(enBar.KEGG)

0.13 P-Value Distributions for all Cell Lines

dds <- dds[,dds$treatment %in% c("NTC-1", "AK2-3D")]
dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
design(dds) <- ~ cell_line + treatment
dds <- DESeq(dds)
res.all <- results(dds, contrast = c("treatment", "NTC-1", "AK2-3D"))
plotTab <- tibble(pval = res.all$pvalue)
p <- ggplot(plotTab, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
  theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
  ggtitle("NTC-1 vs AK2-3D in all cell lines") +
  xlab("raw p values")
p

0.14 Enrichment Analysis for Different Treatments for all Cell Lines

inputTab <- createInput(res.all, pCut = .1, ifFDR = F)
enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
enRes$Name <- gsub("HALLMARK_","", enRes$Name)
enRes <- list(enRes)
names(enRes) <- "NTC-1 vs AK2-3D"
enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = FALSE, setName = "HALLMARK")
plot(enBar)

inputTab <- createInput(res.all, pCut = .1, ifFDR = F)
enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
enRes$Name <- gsub("Oncogenetic_","", enRes$Name)
enRes <- list(enRes)
names(enRes) <- "NTC-1 vs AK2-3D"
enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = FALSE, setName = "Oncogenetic")
plot(enBar)

inputTab <- createInput(res.all, pCut = .1, ifFDR = F)
enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
enRes$Name <- gsub("KEGG_","", enRes$Name)
enRes <- list(enRes)
names(enRes) <- "NTC-1 vs AK2-3D"
enBar <- plotEnrichmentBar(enRes, pCut = .1, ifFDR = FALSE, setName = "KEGG")
plot(enBar)

hv.genes <- function(df, n = 500){
  sds <- apply(df, 1, sd)
  means <- apply(df, 1, mean)
  sds <- unlist(sds)
  means <- unlist(means)
  lm <- lm(sds~means)
  sm <- summary(lm)
  res <- residuals(lm)
  names(res) <- 1:length(res)
  down <- tail(order(res), n)
  down
}

hv.genes <- function(df, n = 500){
  sds <- apply(df, 1, sd)
  down <- head(order(sds), n)
  down
}

cell_lines <- unique(dds$cell_line)
heat.map <- function(cell_line){
  line.indices <- which(dds$cell_line == cell_line)
  colAnno <- data.frame(dds$treatment[line.indices])
  rownames(colAnno) <- colnames(dds)[which(dds$cell_line == cell_line)]
  print(colAnno)
  to.plot <- exprMat[hv.genes(exprMat),]
  to.plot <- t(scale(t(to.plot)))
  pheatmap(to.plot[,line.indices], annotation_col  = colAnno, main = cell_line, show_rownames = F)
}
cell_lines %>% map(heat.map)
##     dds.treatment.line.indices.
## K10                      AK2-3D
## K11                       NTC-1
## K12                      AK2-3D
## K7                        NTC-1
## K8                       AK2-3D
## K9                        NTC-1

##     dds.treatment.line.indices.
## K13                       NTC-1
## K14                      AK2-3D
## K15                       NTC-1
## K16                      AK2-3D
## K17                       NTC-1
## K18                      AK2-3D

##    dds.treatment.line.indices.
## K1                       NTC-1
## K2                      AK2-3D
## K3                       NTC-1
## K4                      AK2-3D
## K5                       NTC-1
## K6                      AK2-3D

##     dds.treatment.line.indices.
## K43                       NTC-1
## K44                      AK2-3D
## K45                       NTC-1
## K46                      AK2-3D
## K47                       NTC-1
## K48                      AK2-3D
## K49                       NTC-1
## K50                      AK2-3D
## K51                       NTC-1
## K52                      AK2-3D

##     dds.treatment.line.indices.
## K19                       NTC-1
## K20                      AK2-3D
## K21                       NTC-1
## K22                      AK2-3D
## K23                       NTC-1
## K24                      AK2-3D

##     dds.treatment.line.indices.
## K25                       NTC-1
## K26                      AK2-3D
## K27                       NTC-1
## K28                      AK2-3D
## K29                       NTC-1
## K30                      AK2-3D

##     dds.treatment.line.indices.
## K37                       NTC-1
## K38                      AK2-3D
## K39                       NTC-1
## K40                      AK2-3D
## K41                       NTC-1
## K42                      AK2-3D

## [[1]]
## 
## [[2]]
## 
## [[3]]
## 
## [[4]]
## 
## [[5]]
## 
## [[6]]
## 
## [[7]]
pointer <- function(cell_line) {
  line.indices <- which(dds$cell_line == cell_line)
  colAnno <- dds$treatment[line.indices]
  to.plot <- exprMat["AK2", line.indices]
  df <- data.frame(y = to.plot, x = factor(colAnno))
  print(df)
  ggplot(df , aes(x, y,
  fill = factor(colAnno),
  col = factor(colAnno)
  )) + geom_point() + labs(title = cell_line)
}
cell_lines %>% map(pointer)
##       y      x
## K10 247 AK2-3D
## K11 783  NTC-1
## K12 377 AK2-3D
## K7  362  NTC-1
## K8  303 AK2-3D
## K9  376  NTC-1
##       y      x
## K13 244  NTC-1
## K14 264 AK2-3D
## K15 266  NTC-1
## K16 239 AK2-3D
## K17 479  NTC-1
## K18 338 AK2-3D
##      y      x
## K1 198  NTC-1
## K2  88 AK2-3D
## K3 302  NTC-1
## K4  75 AK2-3D
## K5 199  NTC-1
## K6  55 AK2-3D
##       y      x
## K43 385  NTC-1
## K44 198 AK2-3D
## K45 307  NTC-1
## K46  86 AK2-3D
## K47 241  NTC-1
## K48 135 AK2-3D
## K49 246  NTC-1
## K50 251 AK2-3D
## K51 139  NTC-1
## K52 134 AK2-3D
##       y      x
## K19 502  NTC-1
## K20 222 AK2-3D
## K21 473  NTC-1
## K22 317 AK2-3D
## K23 441  NTC-1
## K24 205 AK2-3D
##       y      x
## K25 408  NTC-1
## K26 268 AK2-3D
## K27 381  NTC-1
## K28 264 AK2-3D
## K29 594  NTC-1
## K30 316 AK2-3D
##       y      x
## K37 237  NTC-1
## K38 217 AK2-3D
## K39 643  NTC-1
## K40 446 AK2-3D
## K41 597  NTC-1
## K42 400 AK2-3D
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

library(Vennerable)
vennList<- Venn(top.genes[c("Z138", "Nomo1", "BL60", "THP1")])
gVenn <- compute.Venn(vennList, doWeights  = TRUE)
plot(vennList, type = "ellipses")
## Warning in compute.E4(V, doWeights): Cant do a weighted E4